package com.yjx.config;

import lombok.extern.slf4j.Slf4j;
import org.apache.poi.xwpf.usermodel.*;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;

import javax.xml.transform.*;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import java.io.*;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;


/**
 * word内容段落、公式xml标签
 * 正常文本：
 * <w:r> 标签对应一个 XWPFRun对象
 * <w:t xml:space=“preserve”> 标签对应一段在 Word中的字符（也可以是一个字符）
 * 公式：
 * <w:object> 标签对应一个公式（当然我们这里只讲公式，此标签中也可以是一个 Excel也可以是一个 PPT等等）
 * <v:shape> 标签中有个 style属性，这里 style就是图片在 Word中显示的宽高
 * <v:imagedata> 标签关联着显示的图片（ <v:imagedata>为 <v:shape>子标签）
 * <o:OLEObject>标签关联着图片显示公式对应的二进制文件（二进制文件也是最重要的文件，没有这个文件当你在word中双击时，是打不开第三方公式插件的）
 *
 */
/**
 * @description:
 * @author: Han LiDong
 * @create: 2021/5/7 10:33
 * @update: 2021/5/7 10:33
 */
@Slf4j
public class GatWord {
    String A = "{F}=/frac{3000×{/beta }×{T}×/left({D}+{d}/right)}{{/mu }×{Z}×/left({{D}}^{2}+{D}×{d}+{{d}}^{2}/right)}={ }{e}";

    public static void main(String[] args) throws Exception {
//        MultipartFile file = null;
//        InputStream inputStream = file.getInputStream();
        // 所有公式latex表达式集合,借助mathJax可以在页面上进行展示
        List <String> formulas = getFormulaMap(new FileInputStream("D:/测试.docx"));
        log.info("解析到{}个公式",formulas.size());
        // word解析 公式+文本
        wordAnalysis(new FileInputStream("D:/测试.docx"));
    }
    /**
     * word所有内容解析(公式、文本）
     * @param inputStream
     * @throws Exception
     */
    public static void wordAnalysis(InputStream inputStream) throws Exception {
        XWPFDocument word = new XWPFDocument(inputStream);
        try {
            for (IBodyElement ibodyelement : word.getBodyElements()) {
                if (ibodyelement.getElementType().equals(BodyElementType.PARAGRAPH)) {  //段落
                    XWPFParagraph paragraph = (XWPFParagraph) ibodyelement;
                    //段落解析
                    String paragraphStr = parseParagraph(paragraph);
                } else if (ibodyelement.getElementType().equals(BodyElementType.TABLE)) {   //表格
                    XWPFTable table = (XWPFTable) ibodyelement;
                    for (XWPFTableRow row : table.getRows()) {  //行
                        for (XWPFTableCell cell : row.getTableCells()) {    //cell
                            List<String> cellMath = new ArrayList<>(16);
                            for (XWPFParagraph paragraph : cell.getParagraphs()) {  //段落
                                //cell段落解析
                                String paragraphStr = parseParagraph(paragraph);
                                if (!"".equals(paragraphStr.trim())){
                                    cellMath.add(paragraphStr);
                                }
                            }
                            log.info("当前cell有{}个公式",cellMath.size());
                        }
                    }
                }
            }

        } finally {
            word.close();
        }
    }


    /**
     * 解析word中公式（转换成latex表达式）
     *
     * @param inputStream  文件流
     * @return
     */
    public static List<String> getFormulaMap(InputStream inputStream) throws IOException, DocumentException {
        //XWPFDocument xwpfDocument = new XWPFDocument(inputStream);
        Map<Integer, String> result = new HashMap<>();
        XWPFDocument word = new XWPFDocument(inputStream);
        //storing the found MathML in a AllayList of strings
        List<String> mathMLList = new ArrayList<String>(16);
        try {
            for (IBodyElement ibodyelement : word.getBodyElements()) {
                if (ibodyelement.getElementType().equals(BodyElementType.PARAGRAPH)) {  //段落
                    XWPFParagraph paragraph = (XWPFParagraph) ibodyelement;
                    //段落解析
                    List<String> mathList = parseMathParagraph(paragraph);
                    mathMLList.addAll(mathList);
                } else if (ibodyelement.getElementType().equals(BodyElementType.TABLE)) { //woed表格
                    XWPFTable table = (XWPFTable) ibodyelement;
                    for (XWPFTableRow row : table.getRows()) {
                        for (XWPFTableCell cell : row.getTableCells()) {
                            List<String> cellMath = new ArrayList<>(16);
                            for (XWPFParagraph paragraph : cell.getParagraphs()) {
                                //cell段落解析
                                List<String> mathList = parseMathParagraph(paragraph);
                                mathMLList.addAll(mathList);
                                //cellMath.addAll(mathList);
                            }
                        }
                    }
                }
            }
        } finally {
            word.close();
        }
        log.info("当前文档一共有{}个公式",mathMLList.size());
        return mathMLList;
    }



    /**
     * 公式段落解析
     * @param xwpfParagraph
     * @throws DocumentException
     */
    public static List<String> parseMathParagraph(XWPFParagraph xwpfParagraph) throws DocumentException {
        CTP ctp = xwpfParagraph.getCTP();
        String xmlText = ctp.xmlText();
        List<String > mathList = new ArrayList<>();
        if (xmlText.contains("<m:oMath>")) {
            //得到根节点的值
            SAXReader saxReader = new SAXReader();
            //将String类型的字符串转换成XML文本对象
            Document doc = saxReader.read(new ByteArrayInputStream(xmlText.getBytes()));
            Element root = doc.getRootElement();
            // 一个段落多个表达式解析
            List<Element> omMaths = root.selectNodes("//m:oMath");    //用xpath得到OMML节点
            for (Element ele : omMaths) {
                /**
                 * OMML -> MathML -> LaTex
                 * Office在安装目录中提供了将OMML转为MathML的xsl工具：MML2OMML.XSL
                 * MathML转LaTex使用网上找到另一个xsl工具mmltex.xsl。
                 */
                String xml = ele.asXML();
                //xml转 mathml/**
                //     * 公式段落解析
                //     * @param xwpfParagraph
                //     * @throws DocumentException
                //     */
                //    public static List<String> parseMathParagraph(XWPFParagraph xwpfParagraph) throws DocumentException {
                //        CTP ctp = xwpfParagraph.getCTP();
                //        String xmlText = ctp.xmlText();
                //        List<String > mathList = new ArrayList<>();
                //        if (xmlText.contains("<m:oMath>")) {
                //            //得到根节点的值
                //            SAXReader saxReader = new SAXReader();
                //            //将String类型的字符串转换成XML文本对象
                //            Document doc = saxReader.read(new ByteArrayInputStream(xmlText.getBytes()));
                //            Element root = doc.getRootElement();
                //            // 一个段落多个表达式解析
                //            List<Element> omMaths = root.selectNodes("//m:oMath");    //用xpath得到OMML节点
                //            for (Element ele : omMaths) {
                //                /**
                //                 * OMML -> MathML -> LaTex
                //                 * Office在安装目录中提供了将OMML转为MathML的xsl工具：MML2OMML.XSL
                //                 * MathML转LaTex使用网上找到另一个xsl工具mmltex.xsl。
                //                 */
                //                String xml = ele.asXML();
                //                //xml转 mathml
                //                String mml = convertOMML2MML(xml);
                //                //mathml转latx
                //                String latex = convertMML2Latex(mml);
                //                mathList.add(latex);
                //                log.info("late表达式：{}" , latex);
                //            }
                //        }
                //        return mathList;
                //    }
                String mml = convertOMML2MML(xml);
                //mathml转latx
                String latex = convertMML2Latex(mml);
                mathList.add(latex);
                log.info("late表达式：{}" , latex);
            }
        }
        return mathList;
    }


     String late = "";

    /**
     * 段落解析
     * @param xwpfParagraph
     * @throws DocumentException
     */
    public static String parseParagraph(XWPFParagraph xwpfParagraph) throws DocumentException {
        CTP ctp = xwpfParagraph.getCTP();
        String xmlText = ctp.xmlText();
        StringBuilder sb = new StringBuilder();
//        if (xmlText.contains("<m:oMath>")) {

        //段落文本内容
        sb.append(xwpfParagraph.getParagraphText());
        //段落公式解析
        //得到根节点的值
        SAXReader saxReader = new SAXReader();
        //将String类型的字符串转换成XML文本对象
        Document doc = saxReader.read(new ByteArrayInputStream(xmlText.getBytes()));
        Element root = doc.getRootElement();
        // 一个段落多个表达式解析
        List<Element> omMaths = root.selectNodes("//m:oMath");    //用xpath得到OMML节点
        String latex = "{F}=\\frac{3000×{r}×{T}×\\left({D}+{d}\\right)}{{k}×{Z}×\\left({{D}}^{2}+{D}×{d}+{{d}}^{2}\\right)}={ }{e}";
        if (omMaths != null && !omMaths.isEmpty()) {
            for (Element ele : omMaths) {
                /**
                 * OMML -> MathML -> LaTex
                 * Office在安装目录中提供了将OMML转为MathML的xsl工具：MML2OMML.XSL
                 * MathML转LaTex使用网上找到另一个xsl工具mmltex.xsl。
                 */
                String xml = ele.asXML();
                //xml转 mathml
                String mathml = convertOMML2MML(xml);
                //mathml转latx
                latex = convertMML2Latex(mathml);
                sb.append(latex);
                log.info("latex表达式：{}",latex);
            }
        }
        int number = sb.toString().indexOf(":");
        Map<String, String> valuesMap = new HashMap<>();

        String[] equations = sb.toString().substring(number+1).split(",");
        for (String eq : equations) {
            String[] parts = eq.split("=");
            if (parts.length == 2) {
                String key = parts[0].trim();
                String value = parts[1].trim();
                valuesMap.put(key, value);
            }
        }
        // 打印Map中的键值对
        String s = "";
        for (Map.Entry<String, String> entry : valuesMap.entrySet()) {
            System.out.println("key:"+entry.getKey() + " : " +"value:"+ entry.getValue());
//            s = latex.replaceAll(entry.getKey(),entry.getValue());
//            System.out.println(s);


        }



        log.info("公式个数：{},解析内容：{}",omMaths.size(),sb.toString());
        return sb.toString();
    }

    /**
     * Description: xsl转换器</p>
     * @param s  公式xml字符串
     * @param xslpath   转换器路径
     * @param uriResolver xls依赖文件
     * @return
     */
    public static String xslConvert(String s, String xslpath, URIResolver uriResolver){
        TransformerFactory tFac = TransformerFactory.newInstance();
        if(uriResolver != null) {
            tFac.setURIResolver(uriResolver);
        }
        StreamSource xslSource = new StreamSource(GatWord.class.getResourceAsStream(xslpath));
        StringWriter writer = new StringWriter();
        try {
            Transformer t = tFac.newTransformer(xslSource);
            Source source = new StreamSource(new StringReader(s));
            Result result = new StreamResult(writer);
            t.transform(source, result);
        } catch (TransformerException e) {
            log.error(e.getMessage(), e);
        }
        return writer.getBuffer().toString();
    }

    /**
     * <p>Description: 将mathml转为latx </p>
     * @param mml mathml字符串
     * @return
     */
    public static String convertMML2Latex(String mml){
        mml = mml.substring(mml.indexOf("?>")+2, mml.length()); //去掉xml的头节点
        URIResolver r = new URIResolver(){  //设置xls依赖文件的路径
            @Override
            public Source resolve(String href, String base) throws TransformerException {
                File f = new File("/conventer/mml2tex/" + href);
                InputStream inputStream = GatWord.class.getResourceAsStream("/conventer/mml2tex/" + href);
                return new StreamSource(inputStream);
            }
        };
        String latex = xslConvert(mml, "/conventer/mml2tex/mmltex.xsl", r);
        if(latex != null && latex.length() > 1){
            latex = latex.substring(1, latex.length() - 1);
        }
        return latex;
    }
    /**
     * <p>Description: office xml转为mathml </p>
     * @param xml  公式xml
     * @return
     */
    public static String convertOMML2MML(String xml){
        // 进行转换的过程中需要借助这个文件,一般来说本机安装office就会有这个文件,找到就可以
        String result = xslConvert(xml, "/conventer/OMML2MML.XSL", null);
        return result;
    }

}


